# -*- coding: utf-8 -*-
import pdfplumber
 
'''
脚本从pdf拾取文本并以 utf-8 编写入 .txt
'''
 
def pdf2txt(pdf_path):
    txt = ''
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            txt = txt + page.extract_text()
    return txt

local_in_text = pdf2txt("./pdf/test.pdf")
local_in_text.encode('utf-8')

#print(local_in_text)

#由于编码格式的不一致，可能会发生错误，所以使用 errors='ignore'
with open("./pdf/test.txt","w",encoding='utf-8',errors='ignore') as f:
    f.write(local_in_text)